************************************
*********** PROJECT INFO ***********
************************************

// Record Linkage for Character-Based Names
// 1880-1900 balance test
// Author: Hannah Postel
// Date: 10/26/2022


***********************************
*********** 1880 VALUES ***********
***********************************

*** POPULATION MEANS ***

// Import ABE crosswalk (total population, not just Chinese)
// Keep ABE-standard matches (this also gets rid of dupes pre merging)
use "$data/crosswalk_1880_1900.dta", clear
keep if abe_exact_standard==1

// Merge to full-count 1880 Chinese data, keep only Chinese individuals
merge 1:1 histid_1880 using "$data/1880_chinese.dta"
drop if _merge==1

// Summarize continuous variables
sum age sei

// Create dummy variables from categorical and summarize
gen single=1 if marst==6
replace single=0 if single==.
sum single

gen lit2=1 if lit==4
replace lit2=0 if lit2==.
sum lit2

gen ca=1 if stateicp==71
replace ca=0 if ca==.
sum ca


*** ABE BALANCE TEST ***

// Generate dummy variable for ABE standard match
gen abe_match=1 if abe_exact_standard==1
replace abe_match=0 if abe_match==.

// Conduct balance test for selected variables
reg age abe_match, robust
reg sei abe_match, robust
reg single abe_match, robust
reg lit2 abe_match, robust
reg ca abe_match, robust


*** POSTEL BALANCE TEST ***

// Merge matches to full-count Chinese data
use "$data/matches 26 oct.dta", clear
merge 1:1 histid_1880 using "$data/1880_chinese.dta"

// Generate dummy variable for Postel matches
gen postel_match=1 if step!=""
replace postel_match=0 if step==""

// Create dummy variables from categorical
gen single=1 if marst==6
replace single=0 if single==.

gen lit2=1 if lit==4
replace lit2=0 if lit2==.

gen ca=1 if stateicp==71
replace ca=0 if ca==.

// Conduct balance test for selected variables
reg age postel_match, robust
reg sei postel_match, robust
reg single postel_match, robust
reg lit2 postel_match, robust
reg ca postel_match, robust


***********************************
*********** 1900 VALUES ***********
***********************************

*** POPULATION MEANS *** 

// Import ABE crosswalk (total population, not just Chinese)
// Keep ABE-standard matches (this also gets rid of dupes pre merging)
use "$data/crosswalk_1880_1900.dta", clear
keep if abe_exact_standard==1

// Merge to full-count 1900 Chinese data, keep only Chinese individuals
merge 1:1 histid_1900 using "$data/1900_chinese.dta"
drop if _merge==1

// Summarize continuous variables
sum age sei yrimmig

// Create dummy variables from categorical and summarize
gen single=1 if marst==6
replace single=0 if single==.
sum single

gen english=1 if speakeng==2
replace english=0 if english==.
sum english

gen lit2=1 if lit==4
replace lit2=0 if lit2==.
sum lit2

gen ca=1 if stateicp==71
replace ca=0 if ca==.
sum ca


*** ABE BALANCE TEST ***

// Generate dummy variable for ABE standard matches
gen abe_match=1 if abe_exact_standard==1
replace abe_match=0 if abe_match==.

// Conduct balance test for selected variables
reg age abe_match, robust
reg sei abe_match, robust
reg single abe_match, robust
reg lit2 abe_match, robust
reg ca abe_match, robust


*** POSTEL BALANCE TEST ***

// Merge matches to full-count Chinese data
use "$data/matches 26 oct.dta", clear
merge 1:1 histid_1900 using "$data/1900_chinese.dta"

// Generate dummy variable for Postel matches
gen postel_match=1 if step!=""
replace postel_match=0 if step==""

// Generate dummy variables from categorical
gen single=1 if marst==6
replace single=0 if single==.

gen lit2=1 if lit==4
replace lit2=0 if lit2==.

gen ca=1 if stateicp==71
replace ca=0 if ca==.

// Conduct balance test for selected variables
reg age postel_match, robust
reg sei postel_match, robust
reg single postel_match, robust
reg lit2 postel_match, robust
reg ca postel_match, robust
